********************************************************************************
* clean_mna.do
* Purpose: Construct the M&A deal-level datasets from the SDC Platinum
*          M&A database linked to NALMF firm identifiers.
*
* Input : $data0/mna_linked_to_nalmf.dta  (SDC deals x NALMF firm IDs)
*         $data2/mna_linked_to_nalmf_with_naics (industry overlap indicators)
*
* Outputs:
*   all_mna.dta        - All completed deals (firm x deal level, both parties)
*   first_mna.dta      - First M&A event per firm (used to define treatment)
*   repeat_acquirer.dta - Number of deals per acquirer firm
********************************************************************************

//------------------------------------------------------------------------------
// SECTION 1: BUILD THE FULL M&A DEAL DATASET (all_mna.dta)
//------------------------------------------------------------------------------
use $data0\mna_linked_to_nalmf.dta , clear

* Keep only completed deals (Status == "C"); drop pending/withdrawn deals
keep if Status == "C"

* Drop share buybacks (not true M&A transactions between two separate firms)
drop if Form == "Buyback"

* Drop deals where the target and acquirer are the same firm (self-dealing).
* For each deal, record the counterparty's firm ID: the acquirer sees the target,
* and the target sees the acquirer.
gsort DEAL -Acquirer
bys DEAL: gen 		other_party_id = entid_syn[_n-1] if Acquirer == 0
bys DEAL: replace 	other_party_id = entid_syn[_n+1] if Acquirer == 1
drop if entid_syn == other_party_id

* Merge in indicators for whether the deal occurred within the same industry
* or sector (based on NAICS codes of both parties).
* within_all    = 1 if acquirer and target share the same 4-digit NAICS
* within_sector = 1 if they share the same 2-digit NAICS sector
merge 1:1 DEAL Acquirer using $data2/mna_linked_to_nalmf_with_naics, keepusing(within_all within_sector naics_other) keep(1 3) nogen

* Recode deal form into three categories:
*   1 = Asset acquisition (Acq. of Assets / Acq. of Certified Assets)
*   2 = Interest acquisition (Majority, Partial, or Remaining interest)
*   3 = Merger
gen 	Form_code = 1 if Form == "Acq. Cert. Asts." | Form == "Acq. of Assets"
replace Form_code = 2 if Form == "Acq. Maj. Int." | Form == "Acq. Part. Int." | Form == "Acq. Rem. Int."
replace Form_code = 3 if Form == "Merger"

* Binary indicator: merger == 1 for true mergers (Form_code == 3)
gen 	merger = (Form_code == 3)
replace merger = . if Form_code == .

* Keep only the variables needed downstream
keep DEAL Acquirer DEAL_YEAR merger entid_syn other_party_id within_all

compress
save $data/all_mna, replace


//------------------------------------------------------------------------------
// SECTION 2: FIRST M&A EVENT PER FIRM (first_mna.dta)
//
// For firms with multiple M&A events, keep only the earliest deal.
// This is the definition of "treatment" used in the matching and analysis.
//------------------------------------------------------------------------------
preserve

* Sort by firm and year so duplicates drop keeps the earliest deal
gsort entid_syn DEAL_YEAR DEAL
duplicates drop entid_syn, force

keep DEAL Acquirer DEAL_YEAR merger entid_syn other_party_id within_all

compress
save $data/first_mna, replace

//------------------------------------------------------------------------------
// SECTION 3: REPEAT ACQUIRER COUNT (repeat_acquirer.dta)
//
// Count the number of deals per acquirer firm (used for heterogeneity analysis).
//------------------------------------------------------------------------------
restore
preserve

* Collapse to one row per firm-role, counting deals
collapse (count) num_deal = DEAL, by(entid_syn Acquirer)

* Keep only acquirers (Acquirer == 1)
keep if Acquirer == 1

compress
save $data/repeat_acquirer, replace
